{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "x_dhQfFYXoPu"
      },
      "outputs": [],
      "source": [
        "import model_loader\n",
        "import pipeline\n",
        "from PIL import Image\n",
        "from pathlib import Path\n",
        "from transformers import CLIPTokenizer\n",
        "import torch\n",
        "\n",
        "DEVICE = \"cpu\"\n",
        "\n",
        "ALLOW_CUDA = False\n",
        "ALLOW_MPS = False\n",
        "\n",
        "if torch.cuda.is_available() and ALLOW_CUDA:\n",
        "    DEVICE = \"cuda\"\n",
        "elif (torch.has_mps or torch.backends.mps.is_available()) and ALLOW_MPS:\n",
        "    DEVICE = \"mps\"\n",
        "print(f\"Using device: {DEVICE}\")\n",
        "\n",
        "tokenizer = CLIPTokenizer(\"../data/vocab.json\", merges_file=\"../data/merges.txt\")\n",
        "model_file = \"../data/v1-5-pruned-emaonly.ckpt\"\n",
        "models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)\n",
        "\n",
        "## TEXT TO IMAGE\n",
        "\n",
        "# prompt = \"A dog with sunglasses, wearing comfy hat, looking at camera, highly detailed, ultra sharp, cinematic, 100mm lens, 8k resolution.\"\n",
        "prompt = \"A cat stretching on the floor, highly detailed, ultra sharp, cinematic, 100mm lens, 8k resolution.\"\n",
        "uncond_prompt = \"\"  # Also known as negative prompt\n",
        "do_cfg = True\n",
        "cfg_scale = 8  # min: 1, max: 14\n",
        "\n",
        "## IMAGE TO IMAGE\n",
        "\n",
        "input_image = None\n",
        "# Comment to disable image to image\n",
        "image_path = \"../images/dog.jpg\"\n",
        "# input_image = Image.open(image_path)\n",
        "# Higher values means more noise will be added to the input image, so the result will further from the input image.\n",
        "# Lower values means less noise is added to the input image, so output will be closer to the input image.\n",
        "strength = 0.9\n",
        "\n",
        "## SAMPLER\n",
        "\n",
        "sampler = \"ddpm\"\n",
        "num_inference_steps = 50\n",
        "seed = 42\n",
        "\n",
        "output_image = pipeline.generate(\n",
        "    prompt=prompt,\n",
        "    uncond_prompt=uncond_prompt,\n",
        "    input_image=input_image,\n",
        "    strength=strength,\n",
        "    do_cfg=do_cfg,\n",
        "    cfg_scale=cfg_scale,\n",
        "    sampler_name=sampler,\n",
        "    n_inference_steps=num_inference_steps,\n",
        "    seed=seed,\n",
        "    models=models,\n",
        "    device=DEVICE,\n",
        "    idle_device=\"cpu\",\n",
        "    tokenizer=tokenizer,\n",
        ")\n",
        "\n",
        "# Combine the input image and the output image into a single image.\n",
        "Image.fromarray(output_image)"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "collapsed_sections": [
        "iDI2dKfRWTId"
      ],
      "provenance": []
    },
    "gpuClass": "standard",
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
